#!/bin/bash

function buildCancerTypesFile {
  rm -rf Temp/CancerTypes
  mkdir -p Temp/CancerTypes

  for f in FeatureCounts/*
  do
    sampleID=$(basename $f)
    sampleCancerType=$(cat CancerTypes/$sampleID)
    cancerTypeMatch=$(grep $sampleCancerType TCGA_CancerTypes_Publishable.txt)

    # Make sure we can publish on this sample
    if [ "$cancerTypeMatch" == "$sampleCancerType" ]
    then
      cp -v CancerTypes/$sampleID Temp/CancerTypes/
    fi
  done

  python Codes/CombineScalarValues.py "Temp/CancerTypes/*" PANCAN20_CancerType_Samples.txt

  rm -rf Temp/CancerTypes
}

function matricize {
  subDir=$1

  tempSummDir=Temp/Summarize_${subDir}
  rm -rf $tempSummDir
  mkdir -p $tempSummDir

  for f in $subDir/*
  do
    sampleID=$(basename $f)
    sampleCancerType=$(cat CancerTypes/$sampleID)
    cancerTypeMatch=$(grep $sampleCancerType TCGA_CancerTypes_Publishable.txt)

    # Make sure we can publish on this sample
    if [ "$cancerTypeMatch" == "$sampleCancerType" ]
    then
      cp -v $f $tempSummDir/
    fi
  done

  outFile=matrices/PANCAN20.IlluminaHiSeq_RNASeqV2.tumor_Rsubread_${subDir}.txt

  python Codes/BuildMatrixFile.py "$tempSummDir/*" $outFile
  python Codes/PrintMatrixDimensions.py $outFile

  rm -f $outFile.gz

  echo Zipping $outFile
  gzip -v $outFile

  rm -rf $tempSummDir
}

buildCancerTypesFile

matricize RPKMlog &
matricize RPKM &
matricize FeatureCounts &
wait
